clear all
set more off
global data 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_dta"
global in 		"R:\SharedProjects\Shared2020-070\2016\input"
global rand 	"R:\Public\Contributions\Rand\RandHrs\stata"
global data_ow	"R:\SharedProjects\Shared2020-070\2016\output\rev2"			/*These are files that HRS overwrites as they get new editions and that we saved in a special directory*/
global figures 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_log"

cap log close
log using $figures\C_file,replace t

cd $data

clear
set maxvar 30000


use "$data_ow\tracker", clear 	/*This is the HRS-overwritten trk2020tr_r*/
rename *,lower 
keep hhid pn degree gender study exdeathmo exdeathyr
egen hhidpn=concat(hhid pn)
destring hhidpn,replace
sort hhidpn
save "$data\track_demogs",replace

use "R:\Restricted\SSA Administrative Data\Other\SSI\stata\ssib_r.dta",clear
egen rrr=rsum(FEDPMT01 FEDPMT02 FEDPMT03 FEDPMT04 FEDPMT05 FEDPMT06 FEDPMT07 FEDPMT08 FEDPMT09 FEDPMT10 FEDPMT11 FEDPMT12)
rename *,lower
egen hhidpn=concat(hhid pn)
destring hhidpn,replace
keep hhidpn year rrr
collapse rrr,by(hhidpn year)
gen receive_ssi_admin=rrr>0 & rr!=.			/*Dummy for whether received SSI payment that year*/
drop rrr
sort hhidpn year
save $data\receive_ssi_ssa,replace

use "R:\Restricted\SSA Administrative Data\Respondent\Benefits\stata\ben1j_r.dta",clear
rename *,lower 
egen hhidpn=concat(hhid pn)
destring hhidpn,replace
keep hhidpn tob* year
forvalues j=1(1)9	{
	gen r`j'=tob0`j'==2
}
forvalues j=10(1)12	{
	gen r`j'=tob`j'==2
}
egen rrr=rsum(r1-r12)
collapse rrr,by(hhidpn year)
gen receive_ssdi_admin=rrr>0 & rr!=.		/*Dummy for whether received SSDI payment that year*/
drop rrr
sort hhidpn year
save $data\receive_ssdi_ssa,replace


u $data\receive_ssi_ssa,clear
gcollapse receive,by(hhidpn)
gen ever_received_ssi=receive>0
keep hhidpn ever_received_ssi
sort hhidpn
save  $data\ever_received_ssi,replace

u $data\receive_ssdi_ssa,clear
gcollapse receive,by(hhidpn)
gen ever_received_di=receive>0
keep hhidpn ever_received_di
sort hhidpn
save  $data\ever_received_di,replace

u $data\ever_received_di,clear
merge 1:1 hhidpn using $data\ever_received_ssi
drop _merge
egen ever_received_di_or_ssi=rsum(ever_received_ssi ever_received_di)
sort hhidpn
save  $data\ever_received_di_or_ssi,replace
erase  $data\ever_received_di.dta
erase  $data\ever_received_ssi.dta




use "$data_ow\hrsdata", clear			/*This is the HRS-overwritten randhrs1992_2020v1*/

forvalues j=7(1)15	{
	replace r`j'hlthlm = 1 if r`j'hlthlm == .y 	//7th wave has a lot of "assumed yes", replace with yes for now
}

* recode wave 1 adls as dummies consistent with other waves
foreach i in r1walkr r1dress r1stoop r1bed {
	replace `i' = 0 if `i' == 1
	replace `i' = 1 if `i' > 1 & `i' != .
	ren `i' `i'a
}
replace r1doctim = 2*r1doctim //impute 2 year doctor visits using 1 year doctor visits

forvalues j=1(1)15	{
	g r`j'gisp=(s`j'higov==1)
	g r`j'pisp=(s`j'prpcnt>0 & s`j'prpcnt<.)
}

g r1oopmdsp=.
forvalues j=2(1)15	{
	g r`j'oopmdsp=(s`j'oopmd)
}

forvalues j=1(1)9	{
	g r`j'oopmdosp=.
}
forvalues j=10(1)15	{
	g r`j'oopmdosp=s`j'oopmdo
}


keep 	hhidpn r*issdi r*issi r*isdi rabyear rabdate r*hlthlm r*iwendy r*iwendm r*iwstat ragender raracem raeduc r*agey_b r*shlt r*jcocc r*jcten r*jcind ///
		r*jlocc r*jlten r*jlind r*mstat r*hibp r*diab r*cancr r*lung r*heart r*strok r*psych r*arthr r*oopmd r*oopmdo r*hosp r*hspnit r*walkra r*dressa ///
		r*stoopa r*bmi r*beda r*shopa r*mealsa r*proxy r*jcoccb r*jcoccc r*jlocca r*jloccb r*jloccc r*jlasty r*lbrf h*itot radyear radmonth r*doctim ///	
		r*totmd r*higov r*govmr r*govmd r*govva r*mrprem r*prpcnt r*prprm* r*hiothp r*henum h*atotb h*atotf  h*astck h*achck h*acd h*abond r*inlbrf inw* ///
		r*jyears r*jphys r*jlift r*jstoop r*jsight r*jstres r*covr r*gisp r*pisp r*oopmdsp r*oopmdosp 
drop respagey_b redoctim rehosp rehspnit reoopmdo reoopmd regovmr regovva rehigov remrprem reprpcnt reprprm* recovr 


****Balance, so that all variables have sequence x1,x2,...,x15

//no iadls in wave 1, code to missing 
foreach i in r1shopa r1mealsa {
	gen `i' = .
}

// no occ b before w7 and no occ c before w10, generate missings
forvalues j=1(1)9	{
	gen r`j'jcoccc = .
	gen r`j'jloccc = .
	if `j' < 7 {
		gen r`j'jcoccb = .
		gen r`j'jloccb = .
	}
}

forvalues j=1(1)2	{
	gen r`j'totmd = .
	gen r`j'mrprem = .
}
forvalues j=4(1)15	{
	gen r`j'totmd = .
}

forvalues j=4(1)5	{
	gen r`j'prprm3  = .
}
gen r1prprm2  = .
gen r1prprm3  = .

// no expenditure data in wave 1
gen r1oopmd=.

forvalues j=1(1)9	{
		g r`j'oopmdo=.
}

forvalues j=1(1)7	{
		g r`j'pmbmi=.
}

g r15pmbmi=.


***Put wave number at the end
forvalues j=1(1)15	{
	foreach i in issdi issi isdi hlthlm iwendy iwendm iwstat agey_b shlt jcocc jcten jcind jlocc jlten jlind mstat hibp diab cancr lung heart strok psych arthr ///
				 hosp hspnit walkra dressa stoopa bmi beda shopa mealsa proxy jcoccb jcoccc jlocca jloccb jloccc jlasty lbrf inlbrf doctim ///
				 totmd higov govmr govmd govva mrprem prpcnt prprm1 prprm2 prprm3 hiothp henum oopmd oopmdo pmbmi jyears jphys jlift jstoop ///
				 jsight jstres covr gisp pisp oopmdsp oopmdosp {
		ren r`j'`i' r`i'`j'
	}
	gen interview_date`j'=mdy(riwendm`j',1,riwendy`j')
}


forvalues j=1(1)15	{
	foreach i in atotb astck achck acd abond itot atotf {
		ren h`j'`i' h`i'`j'
	}
}


merge 1:1 hhidpn using $data\hrs_healthcond.dta
drop if _merge == 2
drop _merge


forvalues j=1(1)2	{
	foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov	{
		gen `i'`j'=.
	}
}

replace radyear = . if radyear > 3000
replace radmonth = . if radmonth > 13

rename radyear year_died
rename radmonth month_died

egen firstyear = rowmin(riwendy1-riwendy15)		

lab var firstyear "first interview year"

** lastyear is the last year that we observe someone. 
** Thus, lastyear is 2014/15 for someone who is observed alive in the last survey, is an earlier year
** for people who disappear from the sample for whatever reason. This can be combined with year_died
** to identify who disappeared because they died and who disappeared for other reasons. 
egen lastyear = rowmax(riwendy1-riwendy15)
lab var lastyear "last year observed"

gen lastmonth = .
forvalues j = 1(1)15 {
	replace lastmonth = riwendm`j' if riwendy`j' == lastyear
}
lab var lastmonth "last month observed"

sort hhidpn
merge hhidpn using "$data\track_demogs"
drop if _merge==2
drop _merge
drop hhid pn 

save "$data\temp1",replace

/*
Dataset above has variables in balanced sequence x1, x2,...,x15 (missing when variable not in original)
except for: hhidpn, ragender, raracem, rabyear, rabdate
month_died, year_died, raeduc, firstyear, lastyear, lastmonth
*/

forvalues j=1(1)15	{
	ren rprprm1`j' rprprm1_`j'
	ren rprprm2`j' rprprm2_`j'
	ren rprprm3`j' rprprm3_`j'
		}

reshape  long 	rmstat    rdiab    rdoctim rissdi  rhiothp  rjcocc  rjlasty         rtotmd 			///
				rproxy    rcancr   roopmd  risdi   rjcoccb  rinlbrf interview_date 					///
				riwstat   rlung    roopmdo hitot   rmrprem  rjcoccc rhlthlm     	rhlthlm_direct 	///
				ragey_b   rheart   rpmbmi  rissi   rprpcnt  rjcind  rwalkra     	rhlth_temp 		///
				riwendm   rstrok   hastck  rhenum  rprprm1_ rjlten  rstoopa      	rcantwork 		///
				riwendy   rpsych   habond  rhigov  rprprm2_ rjlocc  rdressa      					///
				rshlt     rarthr   hachck  rgovmr  rprprm3_ rjlocca rjloccb     	rbeda 			///
				rbmi      rhosp    hacd    rovmd   rlbrf    rjloccc rmealsa 		rjyears rjphys rjlift rjstoop rjsight rjstres  ///
				rhibp     rhspnit  hatotb  hatotf  rgovva   rjcten  rjlind     		rshopa  inw rcovr ///
				hospcov   nurscov  surgcov doctcov dentcov  drugcov homecov 		helpcov rgisp rpisp roopmdsp roopmdosp, i(hhidpn) j(wave)

gen year=1990+2*wave							/*This is the "official" HRS year, even though some people may be interviewed a year later*/
replace year=1993 if wave==2 & study==11		/*AHEAD cohort*/
replace year=1995 if wave==3 & study==11		/*AHEAD cohort*/

keep if inw==1									/*keep only years in which person is respondent*/  

gen age=year-rabyear

* binary race variable
gen white =.
replace white = 1 if raracem==1
replace white = 0 if raracem==2 | raracem ==3
label def white 1 "White" 0 "Non-white"
label values white white

* binary gender variable
gen female=.
replace female=1 if ragender==2
replace female=0 if ragender==1
lab def female 1 "female" 0 "male"
lab values female female

gen college = .
replace college = 0 if raeduc==1 | raeduc==2 | raeduc==3
replace college = 1 if raeduc==4 | raeduc==5
label def college 0 "At most High School degree" 1 "Some college or more"
label values college college

compress
sort hhidpn wave
save "$data\HRS_RAND",replace		/*This dataset is the longitudinal HRS for the vars of interest, in long format*/
  
******************************************************************************************************
cd $data
u f831_and_inc.dta, clear
merge m:1 hhidpn using "$data\temp1"

drop if _merge==1
drop if _merge==2 			
drop _merge					// Note: when doing regressions about whether to apply or not/whether to report disability, we are selecting
							// on "consenters" 

							
* replace income with 0 if and then with missing if person has died
replace w2earn 			= 0 if w2earn==.
replace earn 			= 0 if earn==.|earn==.m
replace w2earn 			= . if year>year_died+1 & year_died!=.
replace earn 			= . if year>year_died+1 & year_died!=.
replace experience	 	= . if year>year_died+1 & year_died!=.
replace experience_alt 	= . if year>year_died+1 & year_died!=.

* find closest interview after F831 application (i.e. merge in interview data by application year)
gen wave_appl = .
scalar def month_threshold=12					
forvalues j=1(1)15	{
	g dist`j'=(interview_date`j'-date_app)		/*Distance between disability report in HRS and date of application*/ 
												/*This is in days: Days separating the interview date from the application date*/
	replace dist`j'=. if dist`j'<0 				/*Only interested in interview that happens after application*/
	}
egen row_min = rowmin(dist1-dist15)

***The variable rowmin is, for each F831 record (43 years SSA), the minimum distance from each of the 15 HRS wave interviews
***For example, suppose that the application date is 1/1/2004 and the HRS interviews are 4/1/2002, 4/1/2004, and 4/1/2006
***The dist. variables will be "." (b/c use only interviews after applications), "90" and "820", respectively
 
drop ragey* riwend*

*** MERGED DATASET HAS 43 rows for each f831 individual record (or just 43 rows for those with no f831 records but ssa records) 
*** - from 1978 to 2020 
*** RAND merges, for each row, variables x1, x2,...,x12, x13, x15 (for the 15 HRS waves)
*** THE COMMAND below creates variable x for the 43 years for those with f831 record

********************************************************************************************************************************************************************
****This creates variables that are filled with the value of the variable of the HRS wave closest to the application date

foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_appl=.
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_appl = r`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_appl = `i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_appl = 1 if `i'_appl == 3 	//disputes previous report and now has condition
	qui replace `i'_appl = 0 if `i'_appl == 4 	//disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_appl = h`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
		qui replace wave_appl = `j' if dist`j'==row_min & row_min!=.
		qui replace inw_appl = inw`j' if dist`j'==row_min & row_min!=.
		qui replace interview_date_appl = interview_date`j' 	if dist`j'==row_min & row_min!=.
}
********************************************************************************************************************************************************************


********************************************************************************************************************************************************************
****This creates variables that are filled with the value of the variable in the year of the HRS interview 

foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_intvw=.
}

forvalues j=1(1)15	{
	qui gen iy`j'=year(interview_date`j')
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork  proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp  ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_intvw = r`i'`j' 	if iy`j'==year
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_intvw = `i'`j' 	if iy`j'==year
	}
}


foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_intvw = 1 if `i'_intvw == 3 //disputes previous report and now has condition
	qui replace `i'_intvw = 0 if `i'_intvw == 4 //disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_intvw = h`i'`j' if iy`j'==year
	}
}

forvalues j=1(1)15	{
		qui replace inw_intvw = inw`j' if iy`j'==year
		qui replace interview_date_intvw = interview_date`j' 	if iy`j'==year
}
********************************************************************************************************************************************************************


*Some variable renaming
ren shlt_appl 			 hlt_appl
ren hlthlm_appl 		 hlthlm_appl
ren interview_date_appl  intvw_dt_appl
ren interview_date_intvw intvw_dt_intvw
ren jcocc_appl 			 curr_occ_appl
ren jcten_appl 			 curr_tenure_appl
ren jcind_appl 			 curr_ind_appl
ren jlten_appl 			 longest_tenure_appl
ren jlind_appl 			 longest_ind_appl
ren mstat_appl 			 mar_stat_at_appl
ren hlthlm_direct_appl 	 hlthlm_direct_appl
ren hlth_temp_appl 		 hlth_temp_appl
ren cantwork_appl 		 cantwork_appl
ren proxy_appl 			 proxy_appl
ren jcoccb_appl 		 curr_occ_b_appl
ren jcoccc_appl 		 curr_occ_c_appl
ren jlocc_appl 			 longest_occ_appl
ren jloccb_appl 		 longest_occ_b_appl
ren jloccc_appl 		 longest_occ_c_appl
ren jlasty_appl 		 last_lfp_yr_appl
ren lbrf_appl 			 lfp_status_appl
ren doctim_appl 		 doc_visits_appl
ren row_min 			 days_to_interview_appl

lab var days_to_interview_appl "Days between interview and application"

label val curr_occ_appl 		OCCUP
label val curr_occ_b_appl 		OCCUPB
label val curr_occ_c_appl 		OCCUPC
label val longest_occ_appl 		OCCUP
label val longest_occ_b_appl 	OCCUPB
label val longest_occ_c_appl 	OCCUPC
label val lfp_status_appl 		LBRF


//12 month window after application
//tags records within 12=month_threshold of application
//It will be F831 records within 12 months of application *43 years
gen 	within_threshold_appl = 0
replace within_threshold_appl = 1 if days_to_interview_appl < month_threshold*31 & days_to_interview_appl!=. 

//6 and 9 month window after application
gen 	within_6mo_appl = 0 
replace within_6mo_appl = 1 if days_to_interview_appl < 6*31 & days_to_interview_appl!=.
gen 	within_9mo_appl = 0
replace within_9mo_appl = 1 if days_to_interview_appl < 9*31 & days_to_interview_appl!=.

* create indicator for doctor-diagnosed health condition:
gen 	bs_sf_appl = .
replace bs_sf_appl=0 if hibp_appl==0 & psych_appl==0 & heart_appl==0 & arthr_appl==0  & diab_appl==0 & lung_appl==0 & strok_appl==0 & cancr_appl==0
replace bs_sf_appl=1 if hibp_appl==1
replace bs_sf_appl=2 if psych_appl==1
replace bs_sf_appl=3 if heart_appl==1
replace bs_sf_appl=4 if arthr_appl==1
replace bs_sf_appl=5 if diab_appl==1
replace bs_sf_appl=6 if lung_appl==1
replace bs_sf_appl=7 if strok_appl==1
replace bs_sf_appl=8 if cancr_appl==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_appl bs_sf

gen bs_sf_intvw = .
replace bs_sf_intvw=0 if hibp_intvw==0 & psych_intvw==0 & heart_intvw==0 & arthr_intvw==0  & diab_intvw==0 & lung_intvw==0 & strok_intvw==0 & cancr_intvw==0
replace bs_sf_intvw=1 if hibp_intvw==1
replace bs_sf_intvw=2 if psych_intvw==1
replace bs_sf_intvw=3 if heart_intvw==1
replace bs_sf_intvw=4 if arthr_intvw==1
replace bs_sf_intvw=5 if diab_intvw==1
replace bs_sf_intvw=6 if lung_intvw==1
replace bs_sf_intvw=7 if strok_intvw==1
replace bs_sf_intvw=8 if cancr_intvw==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_intvw bs_sf


/* 
* The *_appl variables are used for the Type I/Type II error regressions 
* find closest interview to given year (i.e. merge in interview data by calendar year)
*/

********************************************************************************************************************************************************************
****This creates variables that are filled with the value of the variable in the wave of the HRS interview (NOT SURE IF STILL USED!!!)

gen wave = .
foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
			 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork rproxy  ///
			 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim  roopmd roopmdo rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
			 rjyears rjphys rjlift rjstoop rjsight rjstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov ///
			 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond hatotf rcovr rgisp rpisp roopmdsp roopmdosp {
	qui gen `i'=.
}

forvalues j=1(1)15	{
	qui 	gen ddist`j'=(year(interview_date`j')-year)
	qui replace ddist`j'=. if ddist`j'<0 // only interested in interview that happens after a given calendar year
	}
egen row_min = rowmin(ddist1-ddist15)

forvalues j=1(1)15	{
	foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
				 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork rproxy ///
				 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim  rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
	 			 rjyears rjphys rjlift rjstoop rjsight rjstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov ///
				 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond hatotf roopmd roopmdo  rcovr rgisp rpisp roopmdsp roopmdosp {
		qui replace `i' = `i'`j' if ddist`j'==row_min & row_min!=.
		qui replace wave = `j' if ddist`j'==row_min & row_min!=.
	}
}

/*The variables above set variable x=x(wave 1) for the years<=1992, and then x=x(wave 2) for the years 93-94, x=x(wave 3) for the years 95-96*/
/*...and x=x(wave 14) for the years 2017-2018*/
/*unless the year of the interview is dephased*/
**Before 1992, individual variables are set to the HRS 1992 value
***For some variables, the odd-year records don't exist, so they are set to the value 
***of the next HRS wave (so, the 1993 value = 1994 HRS wave value)


ren rhlthlm 		hlthlm
ren interview_date 	intvw_dt
ren rmstat 			marital_status
ren rissdi 			received_ssidi
ren rissi 			received_ssi
ren risdi 			received_di

ren rhlthlm_direct 	hlthlm_direct
ren rhlth_temp 		hlth_temp
ren rcantwork 		cantwork
ren rproxy 			proxy

ren roopmd 			oopmd
ren roopmdo 		oopmdo
ren rcovr			covr

ren rjcocc 			curr_occ
ren rjcoccb 		curr_occ_b
ren rjcoccc 		curr_occ_c
ren rjlocc  		longest_occ			/*1980 census def*/
ren rjloccb 		longest_occ_b		/*2000 census def*/
ren rjloccc 		longest_occ_c		/*2010 census def*/

ren rjlasty 		last_lfp_yr
ren rlbrf 			lfp_status
ren rdoctim 		doc_visits
ren rjlift 			jlift

foreach i in  jyears jphys jstoop jsight jstres gisp pisp {
	ren r`i' `i'
}


label val curr_occ OCCUP
label val curr_occ_b OCCUPB
label val curr_occ_c OCCUPC

label val longest_occ OCCUP
label val longest_occ_b OCCUPB
label val longest_occ_c OCCUPC
label val lfp_status LBRF


foreach i in hibp diab cancr lung heart strok psych arthr hosp hspnit walkra dressa stoopa bmi beda shopa mealsa {
	ren r`i' `i'
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i' = 1 if `i' == 3 //disputes previous report and now has condition
	qui replace `i' = 0 if `i' == 4 //disputes previous report and now does not
}

gen bs_sf = .
replace bs_sf=0 if hibp==0 & psych==0 & heart==0 & arthr==0  & diab==0 & lung==0 & strok==0 & cancr==0
replace bs_sf=1 if hibp==1
replace bs_sf=2 if psych==1
replace bs_sf=3 if heart==1
replace bs_sf=4 if arthr==1
replace bs_sf=5 if diab==1
replace bs_sf=6 if lung==1
replace bs_sf=7 if strok==1
replace bs_sf=8 if cancr==1
label values bs_sf bs_sf

ren row_min yrs_to_interview

gen within_threshold = 0
replace within_threshold = 1 if yrs_to_interview < 2 & yrs_to_interview!=. // flag if there is an interview in same or next calendar year

foreach i in hosp nurs surg doct dent drug home help {
	ren `i'cov cov`i'
	ren `i'cov_appl cov`i'_appl
	ren `i'cov_intvw cov`i'_intvw
	}


drop riss* risdi* rshl* rhl* interview_date* dist* rjc* rjl* rmst* riwstat* rcantwork* 
drop rhibp* rdiab* rcancr* rlung* rheart* rstrok* rpsych* rarthr* 
drop rhosp* rhspnit* rwalkra* rdressa* rstoopa* rbmi* rbeda* rshopa* rmealsa* 
drop rproxy* rlbrf* rdoctim* roopmd* roopmdo* rjyears* rjphys* rjstoop* rjsight* rjstres* rgisp* rpisp*
drop hospcov* nurscov* surgcov* doctcov* dentcov* drugcov* homecov* helpcov*
********************************************************************************************************************************************************************


****Now creates some new variables and clean up demographics

gen interview_year=.
forvalues j=1(1)15	{
		replace interview_year = iy`j' if iy`j'==year
}


gen obese_appl 		= bmi_appl >= 30 if bmi_appl < 300 		
gen underwt_appl 	= bmi_appl < 18.5 if bmi_appl < 300

gen obese_intvw 	= bmi_intvw >= 30 if bmi_intvw < 300 
gen underwt_intvw 	= bmi_intvw < 18.5 if bmi_intvw < 300

gen obese 			= bmi >= 30 if bmi < 300 			
gen underwt 		= bmi < 18.5 if bmi < 300


* clean up demographics : For people with missing education, assign modal value
foreach x in raeduc {
sort hhidpn `x', stable
by hhidpn (`x'): egen m`x'=mode(`x')
sort hhidpn m`x' `x', stable
by hhidpn m`x' (`x'): replace m`x'=`x'[1] if m`x'==.
replace `x' =m`x' 
drop m`x'
}

* construct age
gen age=year-rabyear
drop rabdate
drop dob_y
ren rabyear dob_y

sort hhidpn record year rid al, stable

replace bs=16 if bs==2|bs==7|bs==8|bs==10|bs==20|bs==19|bs==99	/*"Other disability"*/
replace bs=15 if bs==5|bs==6

label def bs 	1 "Musculoskeletal" 3 "Respiratory" 4 "Cardiov." 15 "Dig. \& Urin." 16 "Other" ///
				9 "Endocrine" 11 "Neurol." 12 "Mental dis." 13 "Cancer" 14 "Immune def.",replace
label values bs bs

* binary education variable
gen college = .
replace college = 0 if raeduc==1 | raeduc==2 | raeduc==3
replace college = 1 if raeduc==4 | raeduc==5
label def college 0 "At most High School degree" 1 "Some college or more"
label values college college

* binary race variable
replace raracem=3 if raracem>3 & hhidpn==521804020		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==909765010		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==902295020		/*From confidential race data*/

gen white =.
replace white = 1 if raracem==1
replace white = 0 if raracem==2 | raracem ==3
label def white 1 "White" 0 "Non-white"
label values white white

* binary gender variable
gen female=.
replace female=1 if ragender==2
replace female=0 if ragender==1
lab def female 1 "female" 0 "male"
lab values female female


*binary marital status variable at calendar year level
gen 	married=.
replace married=1 if marital_status==1
replace married=0 if marital_status!=1 & marital_status!=.
lab def married 1 "married" 0 "unmarried"
lab values married married
gen 	widowed=.
replace widowed=1 if marital_status==7
replace widowed=0 if marital_status!=7 & marital_status!=.
lab def widowed 1 "widowed" 0 "not widowed"
lab values widowed widowed


*binary marital status variable at application year level
gen 	married_appl=.
replace married_appl=1 if mar_stat_at_appl==1
replace married_appl=0 if mar_stat_at_appl!=1 & mar_stat_at_appl!=.
lab def married_appl 1 "married" 0 "unmarried"
lab values married_appl married_appl
gen 	widowed_appl=.
replace widowed_appl=1 if mar_stat_at_appl==7
replace widowed_appl=0 if mar_stat_at_appl!=7 & mar_stat_at_appl!=.
lab def widowed_appl 1 "widowed" 0 "not widowed"
lab values widowed_appl widowed_appl


ren 	mstat_intvw 			mar_stat_at_intvw
gen 	married_intvw=.
replace married_intvw=1 if mar_stat_at_intvw==1
replace married_intvw=0 if mar_stat_at_intvw!=1 & mar_stat_at_intvw!=.
lab def married_intvw 1 "married" 0 "unmarried"
lab values married_intvw married_intvw
gen 	widowed_intvw=.
replace widowed_intvw=1 if mar_stat_at_intvw==7
replace widowed_intvw=0 if mar_stat_at_intvw!=7 & mar_stat_at_intvw!=.
lab def widowed_intvw 1 "widowed" 0 "not widowed"
lab values widowed_intvw widowed_intvw


* binary ssi application variable
gen 	ssi=.
replace ssi=1 if rid==16
replace ssi=0 if rid==2|rid==216
label var ssi SSI

* label some variabeles
label var experience 	"Experience using y"
label var experience_alt "Experience using y+w2"
label var success 		"Awarded"
label var nosuccess 	"Not awarded"
label var bs_sf_intvw	"Health cond. reported to HRS"
label var bs_sf_appl 	"Health cond. reported to HRS"

merge m:1 year using $in\cpi.dta
drop if _merge==2
drop _merge

//construct household income measure
preserve
	keep w2earn earn cpi hhidpn year
	duplicates drop hhidpn year, force
	gen hhid = int(hhidpn/100)
	gen pn = hhidpn - hhid*100 
	egen maxy=rmax(w2earn earn)
	egen hh_inc=sum(maxy),by(hhid year) miss
	egen hh_ern=sum(earn),by(hhid year) miss
	gcollapse (mean) hh_inc hh_ern cpi,by(hhid year)
	g hh_inc_real=hh_inc/(cpi/100)
	g hh_ern_real=hh_ern/(cpi/100)
	keep hh_* hhid year
	sort hhid year
	tempfile 
	save temphh,replace
restore

gen hhid = int(hhidpn/100)
sort hhid year
merge m:1 hhid year using temphh
drop _merge

erase temphh.dta

foreach i in w2earn earn {
	gen `i'_real = `i'/(cpi/100)
	drop `i'
}

foreach i in oopmd oopmdo oopmdsp oopmdosp totmd mrprem prprm1 prprm2 prprm3 atotb atotf astck achck acd abond itot issdi isdi issi {
	qui gen `i'_appl_real 	= `i'_appl/(cpi/100)
	qui gen `i'_intvw_real 	= `i'_intvw/(cpi/100)
	drop `i'_appl `i'_intvw
	}


** combine longest tenure occupation into one for applicants
** generate longest tenure occupation using the earliest occupation codes

ren jlocc_intvw 	longest_occ_intvw
ren jloccb_intvw 	longest_occ_b_intvw
ren jloccc_intvw 	longest_occ_c_intvw

ren longest_occ     longest_occ_x      
ren longest_occ_b   longest_occ_b_x     
ren longest_occ_c   longest_occ_c_x   

foreach i in appl intvw x {
	gen 	longest_occ_combined_`i' = longest_occ_`i'
	label val longest_occ_combined_`i' OCCUP

	*census 2000
	replace longest_occ_combined_`i'=1 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==1|longest_occ_b_`i'==2|longest_occ_b_`i'==3)
	replace longest_occ_combined_`i'=2 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==4|longest_occ_b_`i'==5|longest_occ_b_`i'==6| ///
											longest_occ_b_`i'==7|longest_occ_b_`i'==8|longest_occ_b_`i'==9|longest_occ_b_`i'==10|longest_occ_b_`i'==11)
	replace longest_occ_combined_`i'=3 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==17)
	replace longest_occ_combined_`i'=4 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==18)
	replace longest_occ_combined_`i'=5 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==15)
	replace longest_occ_combined_`i'=6 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==13)
	replace longest_occ_combined_`i'=7 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==14)
	replace longest_occ_combined_`i'=8 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==12)
	replace longest_occ_combined_`i'=9 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==16)
	replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==19)
	replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==22)
	replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==20|longest_occ_b_`i'==21)
	replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==23)
	replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==24)
	replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==25)

	*census 2010
	replace longest_occ_combined_`i'=1 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==1|longest_occ_c_`i'==2)
	replace longest_occ_combined_`i'=2 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==3|longest_occ_c_`i'==4|longest_occ_c_`i'==5| ///
											longest_occ_c_`i'==6|longest_occ_c_`i'==7|longest_occ_c_`i'==8|longest_occ_c_`i'==9|longest_occ_c_`i'==10)
	replace longest_occ_combined_`i'=3 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==16)
	replace longest_occ_combined_`i'=4 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==17)
	replace longest_occ_combined_`i'=5 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==14)
	replace longest_occ_combined_`i'=6 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==12)
	replace longest_occ_combined_`i'=7 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==13)
	replace longest_occ_combined_`i'=8 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==11)
	replace longest_occ_combined_`i'=9 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==15)
	replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==18)
	replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==20)
	replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==19)
	replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==21)
	replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==22)
	replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==23)

	replace longest_occ_combined_`i'= 0 if longest_occ_combined_`i' > 3000

	gen SOC = longest_occ_combined_`i'

	merge m:1 SOC using $in\ONETpca, keepusing(Phys* Ksa*) // add ONET principal components for physical and cognitive tasks
	foreach var in Phys* Ksa* {
		ren `var' `var'_`i'
	}
	drop _merge SOC
}


drop 	inw* rpmbmi*   hastck*   habond*   hachck*   hacd*     hatotb* hatotf*  hitot*    rhenum* ///   
		rhigov*   rgovmr*   rgovmd*   rgovva*   rhiothp*  rprprm*   rmrprem*  rprpcnt*  rprprm1* ///  
		rprprm2*  rprprm3*  rinlbrf*  rtotmd*   iy*       ddist*  rcovr* 

compress

sort hhidpn year
merge hhidpn year using $data\receive_ssi_ssa
drop if _merge==2
drop _merge

sort hhidpn year
merge hhidpn year using $data\receive_ssdi_ssa							
drop if _merge==2
drop _merge


sort hhidpn record year

format hhidpn %12.0g

save complete_data_final_v1, replace 		



****************************************
****************************************

****************************************
****************************************
* final dataset is complete
* has at least one obs per person per year, with data on income, f831 application,
* and if rand interview happened after f831 application, then also closest-in-time interview data
* note that if more than one f831 application or appeal in given year, then there are 
* multiple rows per person per year (one for each f831 application/appeal)

***Primary earner data, to be used later
*****************************************************************************************	
cd $data
u complete_data_final_v1.dta, clear
keep if age>=20 & age<=65
drop if year>year_died
replace earn_real=0 if earn_real==.
replace hh_ern_real=0 if hh_ern_real==.
replace record=0 if record==.
egen fff=group(hhidpn record)
xtset fff year 
gen avgy=(L1.earn_real+L2.earn_real+L3.earn_real+L4.earn_real+L5.earn_real)/5
gen avghy=(L1.hh_ern_real+L2.hh_ern_real+L3.hh_ern_real+L4.hh_ern_real+L5.hh_ern_real)/5
gen primary_earner=(avgy/avghy)>0.5 & avgy!=. & avghy!=.
replace	primary_earner=0 if avgy==0 & avghy==0
keep hhidpn year record avgy primary_earner
sort hhidpn year record
save primary_earner,replace
*****************************************************************************************	

erase hrs_healthcond.dta

log close